# 单变量线性回归
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go

plotly.offline.init_notebook_mode()

from linear_regression import LinearRegression

# 读取数据
data = pd.read_csv('data/gdp.csv')
# 划分数据集
"""
使用sample()函数从data中随机抽取80%的样本作为训练集，
并将结果赋值给train_data变量。frac = 0.8表示抽取的比例为80%，
random_state = 0表示设置随机种子，保证每次运行代码时得到的随机结果一致
"""
train_data = data.sample(frac=0.8, random_state=0)
"""
使用drop()函数删除data中在train_data索引中出现的行，
剩下的行即为测试集，并将结果赋值给test_data变量。
train_data.index表示train_data的索引
"""
test_data = data.drop(train_data.index)

# 指定传进来的特征和标签
# 输入特征 X1
input_param_name_1 = 'GDP'
# 输入特征 X2
input_param_name_2 = 'Freedom'
# 输出特征 Y
output_param_name = '幸福指数'
# 基于指定列名获取具体数据
"""
报错：'numpy.float64' object does not support item assignment
原因：x_train和y_train的值是一维数组[]格式
解决：将x_train和y_train的格式改变为二维数组[[]]
"""
x_train = train_data[[input_param_name_1, input_param_name_2]].values
y_train = train_data[[output_param_name]].values

x_test = test_data[[input_param_name_1, input_param_name_2]].values
y_test = test_data[[output_param_name]].values
# 查看数据情况
plot_training_trace = go.Scatter3d(
    x=x_train[:, 0].flatten(),
    y=x_train[:, 1].flatten(),
    z=y_train.flatten(),
    name='Training Set',
    mode='markers',
    marker={
        'size': 10,
        'opacity': 1,
        'line': {
            'color': 'rgb(255, 255, 255)',
            'width': 1
        },
    }
)

plot_test_trace = go.Scatter3d(
    x=x_test[:, 0].flatten(),
    y=x_test[:, 1].flatten(),
    z=y_test.flatten(),
    name='Test Set',
    mode='markers',
    marker={
        'size': 10,
        'opacity': 1,
        'line': {
            'color': 'rgb(255, 255, 255)',
            'width': 1
        },
    }
)

plot_layout = go.Layout(
    title='Date Sets',
    scene={
        'xaxis': {'title': input_param_name_1},
        'yaxis': {'title': input_param_name_2},
        'zaxis': {'title': output_param_name}
    },
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)

plot_data = [plot_training_trace, plot_test_trace]

plot_figure = go.Figure(data=plot_data, layout=plot_layout)

plotly.offline.plot(plot_figure)

# 训练模型
# 梯度下降需要传递两个参数 α、num_iterations(迭代次数)
num_iterations = 500
# 定义学习率α
learning_rate = 0.01
polynomial_degree = 0
sinusoid_degree = 0
"""
参数：
data ----> x_train
labels-----> y_train
polynomial_degree=0(默认)
sinusoid_degree=0(默认)
normalize_data=True(默认)
"""
# 实例化线性回归类
liner_regression = LinearRegression(x_train, y_train, polynomial_degree, sinusoid_degree)
# 模型训练 参数：学习率α、梯度下降迭代次数,返回θ值和迭代500次所有损失值记录
(theta, cost_history) = liner_regression.train(learning_rate, num_iterations)
print('开始时的损失：', cost_history[0])
print('训练后的损失：', cost_history[-1])

# 构建损失函数图像
plt.plot(range(num_iterations), cost_history)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('Gradient Descent Progress')
plt.show()

predictions_num = 10

x_min = x_train[:, 0].min();
x_max = x_train[:, 0].max();

y_min = x_train[:, 1].min();
y_max = x_train[:, 1].max();

x_axis = np.linspace(x_min, x_max, predictions_num)
y_axis = np.linspace(y_min, y_max, predictions_num)

x_predictions = np.zeros((predictions_num * predictions_num, 1))
y_predictions = np.zeros((predictions_num * predictions_num, 1))

x_y_index = 0
for x_index, x_value in enumerate(x_axis):
    for y_index, y_value in enumerate(y_axis):
        x_predictions[x_y_index] = x_value
        y_predictions[x_y_index] = y_value
        x_y_index += 1

z_predictions = liner_regression.predict(np.hstack((x_predictions, y_predictions)))

plot_predictions_trace = go.Scatter3d(
    x=x_predictions.flatten(),
    y=y_predictions.flatten(),
    z=z_predictions.flatten(),
    name='Prediction Plane',
    mode='markers',
    marker={
        'size': 1,
    },
    opacity=0.8,
    surfaceaxis=2,
)

plot_data = [plot_training_trace, plot_test_trace, plot_predictions_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.plot(plot_figure)